*** How worried should we be? The implications of fabricated survey data for political science
*** Figures A2 and A3. Differences in Proportion Selecting Center and Extremes of Scale

set more off

* set directory to location of dataset in following line
cd "C:\~\Downloads\"

use "VEN_fraud_data.dta", clear

gen env2b = env2b1 if env2b2 == 999999
replace env2b = env2b2 if env2b1 == 999999
gen drk1 = drk11 if drk12 == 999999
replace drk1 = drk12 if drk11 == 999999

postfile qdata str32 question scale using "questions_sc.dta", replace
foreach x of varlist l1 venl2 venl3 b1 b2 b3 b4 b6 b43 b11 b12 b13 b18 b21 b21a b32 b37 b47a venb11 venb10 venb51 venvb18 venvb19 venvb20 pr3dn pr3en polz1 ///
ros1 ros4 ing4 eff1 eff2 vengrp15 vengrp16 aoj22new media3 media4 media4b media1 media2 media2b dst1b env1c e5 e15 e3 e16 ///
d1 d2 d3 d4 d5 d6 vengrp5 vengrp6 vengrp7 vengrp8 vengrp9 vengrp10 vengrp11 vengrp12 vengrp13 vengrp14 venps1 venps2 venps3 venct1 venct2 venct3 ///
ls3 soct2 idio2 cp6 cp7 cp8 cp13 cp20 it1 aoj11 aoj12 polz1a m1 m2 drk1 env2b pn4 pn5 lib1 lib2b lib2c ///
lib4 fear11 pol1 vengrp1 vengrp2 vengrp3 vengrp4 mil10a1 mil10e1 mil10un1 mil10un2 mil10a2 mil10e2 venesc2b venesc3 ///
sd2new2 sd3new2 sd6new2 venprot10 venprot12 venprot11 mil10oas1 mil10oas2 {
	di "`x'"
	recode `x' (888888 988888 999999 99 = .), gen(`x'_rs)
	qui summ `x'_rs
	post qdata ("`x'") (r(max))
}
*
postclose qdata
* skew and kurtosis
postfile qdata str32 question skew kurtosis using "questions_skew.dta", replace
foreach x of varlist l1 venl2 venl3 b1 b2 b3 b4 b6 b43 b11 b12 b13 b18 b21 b21a b32 b37 b47a venb11 venb10 venb51 venvb18 venvb19 venvb20 pr3dn pr3en polz1 ///
ros1 ros4 ing4 eff1 eff2 vengrp15 vengrp16 aoj22new media3 media4 media4b media1 media2 media2b dst1b env1c e5 e15 e3 e16 ///
d1 d2 d3 d4 d5 d6 vengrp5 vengrp6 vengrp7 vengrp8 vengrp9 vengrp10 vengrp11 vengrp12 vengrp13 vengrp14 venps1 venps2 venps3 venct1 venct2 venct3 ///
ls3 soct2 idio2 cp6 cp7 cp8 cp13 cp20 it1 aoj11 aoj12 polz1a m1 m2 drk1 env2b pn4 pn5 lib1 lib2b lib2c ///
lib4 fear11 pol1 vengrp1 vengrp2 vengrp3 vengrp4 mil10a1 mil10e1 mil10un1 mil10un2 mil10a2 mil10e2 venesc2b venesc3 ///
sd2new2 sd3new2 sd6new2 venprot10 venprot12 venprot11 mil10oas1 mil10oas2 {
	di "`x'"
	qui summ `x'_rs if clean_data == 1, detail
	post qdata ("`x'") (r(skewness)) (r(kurtosis))
}
*
postclose qdata
** % that select center
postfile qdata str32 question clean_pr_sc clean_n_sc fraud_pr_sc fraud_n_sc z_sc using "questions_selectcenter.dta", replace
foreach x of varlist b1 b2 b3 b4 b6 b43 b11 b12 b13 b18 b21 b21a b32 b37 b47a venb11 venb10 venb51 venvb18 venvb19 venvb20 pr3dn pr3en polz1 ///
ros1 ros4 ing4 eff1 eff2 vengrp15 vengrp16 aoj22new media3 media4 media4b media1 media2 media2b dst1b env1c ///
vengrp5 vengrp6 vengrp7 vengrp8 vengrp9 vengrp10 vengrp11 vengrp12 vengrp13 vengrp14 venps1 venps2 venps3 venct1 venct2 venct3 {
	di "`x'"
	gen `x'_selectc = 1 if `x'_rs == 4
	replace `x'_selectc = 0 if `x'_rs != 4 & `x'_rs != .
	egen `x'_pc_selectc = pc(`x'_selectc), prop
	prtest `x'_selectc if cem_matched == 1, by(likelyfraud)
	post qdata ("`x'") (r(P_1)) (r(N_1)) (r(P_2)) (r(N_2)) (r(z))
}
*
foreach x of varlist d1 d2 d3 d4 d5 d6 e15 e16 e3 e5 l1 venl2 venl3 {
	di "`x'"
	gen `x'_selectc = 1 if `x'_rs == 5
	replace `x'_selectc = 0 if `x'_rs != 5 & `x'_rs != .
	egen `x'_pc_selectc = pc(`x'_selectc), prop
	prtest `x'_selectc if cem_matched == 1, by(likelyfraud)
	post qdata ("`x'") (r(P_1)) (r(N_1)) (r(P_2)) (r(N_2)) (r(z))
}
*
foreach x of varlist m1 m2 polz1a venesc2b venesc3 vengrp1 vengrp2 venprot10 {
	di "`x'"
	gen `x'_selectc = 1 if `x'_rs == 3
	replace `x'_selectc = 0 if `x'_rs != 3 & `x'_rs != .
	egen `x'_pc_selectc = pc(`x'_selectc), prop
	prtest `x'_selectc if cem_matched == 1, by(likelyfraud)
	post qdata ("`x'") (r(P_1)) (r(N_1)) (r(P_2)) (r(N_2)) (r(z))
}
*
postclose qdata
** % that select min/max
postfile qdata str32 question clean_pr_mm clean_n_mm fraud_pr_mm fraud_n_mm z_mm using "questions_selectminmax.dta", replace
foreach x of varlist b1 b2 b3 b4 b6 b43 b11 b12 b13 b18 b21 b21a b32 b37 b47a venb11 venb10 venb51 venvb18 venvb19 venvb20 pr3dn pr3en polz1 ///
ros1 ros4 ing4 eff1 eff2 vengrp15 vengrp16 aoj22new media3 media4 media4b media1 media2 media2b dst1b env1c ///
vengrp5 vengrp6 vengrp7 vengrp8 vengrp9 vengrp10 vengrp11 vengrp12 vengrp13 vengrp14 venps1 venps2 venps3 venct1 venct2 venct3 {
	di "`x'"
	gen `x'_selectminmax = 1 if `x'_rs == 1
	replace `x'_selectminmax = 1 if `x'_rs == 7
	replace `x'_selectminmax = 0 if !inlist(`x'_rs, 1, 7, .)
	egen `x'_pc_selectminmax = pc(`x'_selectc), prop
	prtest `x'_selectminmax if cem_matched == 1, by(likelyfraud)
	post qdata ("`x'") (r(P_1)) (r(N_1)) (r(P_2)) (r(N_2)) (r(z))
}
*
foreach x of varlist  d1 d2 d3 d4 d5 d6 e15 e16 e3 e5 l1 venl2 venl3 {
	di "`x'"
	gen `x'_selectminmax = 1 if `x'_rs == 1
	replace `x'_selectminmax = 1 if `x'_rs == 10
	replace `x'_selectminmax = 0 if !inlist(`x'_rs, 1, 10, .)
	egen `x'_pc_selectminmax = pc(`x'_selectc), prop
	prtest `x'_selectminmax if cem_matched == 1, by(likelyfraud)
	post qdata ("`x'") (r(P_1)) (r(N_1)) (r(P_2)) (r(N_2)) (r(z))
}
*
foreach x of varlist m1 m2 polz1a venesc2b venesc3 vengrp1 vengrp2 venprot10 {
	di "`x'"
	gen `x'_selectminmax = 1 if `x'_rs == 1
	replace `x'_selectminmax = 1 if `x'_rs == 5
	replace `x'_selectminmax = 0 if !inlist(`x'_rs, 1, 5, .)
	egen `x'_pc_selectminmax = pc(`x'_selectc), prop
	prtest `x'_selectminmax if cem_matched == 1, by(likelyfraud)
	post qdata ("`x'") (r(P_1)) (r(N_1)) (r(P_2)) (r(N_2)) (r(z))
}
*
postclose qdata

use questions_sc.dta, clear
merge 1:1 question using questions_selectcenter.dta, nogen
merge 1:1 question using questions_selectminmax.dta, nogen
merge 1:1 question using questions_skew.dta, nogen

** diff of prop in selecting center
gsort scale -question
gen question_label = "3pt_v" + string(_n) if scale == 3
replace question_label = "4pt_v" + string(_n) if scale == 4
replace question_label = "5pt_v" + string(_n) if scale == 5
replace question_label = "7pt_v" + string(_n) if scale == 7
replace question_label = "10pt_v" + string(_n) if scale == 10
gen q_id = _n
labmask q_id, values(question_label)

drop if scale < 7

gen ciu_prop = clean_pr_sc - fraud_pr_sc + 3.18467 * (sqrt(((clean_pr_sc*(1-clean_pr_sc))/clean_n_sc)+((fraud_pr_sc*(1-fraud_pr_sc))/fraud_n_sc))) 
gen cil_prop = clean_pr_sc - fraud_pr_sc - 3.18467 * (sqrt(((clean_pr_sc*(1-clean_pr_sc))/clean_n_sc)+((fraud_pr_sc*(1-fraud_pr_sc))/fraud_n_sc))) 
gen ciu_prop90 = clean_pr_sc - fraud_pr_sc + 2.97831 * (sqrt(((clean_pr_sc*(1-clean_pr_sc))/clean_n_sc)+((fraud_pr_sc*(1-fraud_pr_sc))/fraud_n_sc))) 
gen cil_prop90 = clean_pr_sc - fraud_pr_sc - 2.97831 * (sqrt(((clean_pr_sc*(1-clean_pr_sc))/clean_n_sc)+((fraud_pr_sc*(1-fraud_pr_sc))/fraud_n_sc))) 
gen diff_prop = clean_pr_sc - fraud_pr_sc

gen pr_diff_test_bonf = 1 if abs(z_sc) > 3.18467
replace pr_diff_test_bonf = 0 if abs(z_sc) <= 3.18467
gen pr_diff_test_bonf90 = 1 if abs(z_sc) > 2.97831
replace pr_diff_test_bonf90 = 0 if abs(z_sc) <= 2.97831

* Figure A2
twoway (scatter q_id diff_prop if pr_diff_test_bonf90 == 0 & q_id < 79, msymbol(circle) mcolor(gs8)) (pcbarrow q_id cil_prop q_id ciu_prop if pr_diff_test_bonf90 == 0 & q_id < 79, mcolor(none) lcolor(gs8) lwidth(medthin)) ///
(scatter q_id diff_prop if pr_diff_test_bonf90 == 1  & q_id < 79, msymbol(circle) mcolor(red)) (pcbarrow q_id cil_prop q_id ciu_prop if pr_diff_test_bonf90 == 1 & q_id < 79, mcolor(none) lcolor(red) lwidth(medthin)) ///
(pcbarrow q_id cil_prop90 q_id ciu_prop90 if pr_diff_test_bonf90 == 0 & q_id < 79, mcolor(none) lcolor(gs8) lwidth(medthick)) (pcbarrow q_id cil_prop90 q_id ciu_prop90 if pr_diff_test_bonf90 == 1 & q_id < 79, mcolor(none) lcolor(red) lwidth(medthick)), ///
legend(off) ytitle("") ylabel(45(1)78, valuelabel angle(horizontal) labsize(small)) ysize(4) xsize(3) scheme(plotplain) xline(0) saving(diffsc1, replace)
twoway (scatter q_id diff_prop if pr_diff_test_bonf90 == 0 & q_id >= 79, msymbol(circle) mcolor(gs8)) (pcbarrow q_id cil_prop q_id ciu_prop if pr_diff_test_bonf90 == 0 & q_id >= 79, mcolor(none) lcolor(gs8) lwidth(medthin)) ///
(scatter q_id diff_prop if pr_diff_test_bonf90 == 1  & q_id >= 79, msymbol(circle) mcolor(red)) (pcbarrow q_id cil_prop q_id ciu_prop if pr_diff_test_bonf90 == 1 & q_id >= 79, mcolor(none) lcolor(red) lwidth(medthin)) ///
(pcbarrow q_id cil_prop90 q_id ciu_prop90 if pr_diff_test_bonf90 == 0 & q_id >= 79, mcolor(none) lcolor(gs8) lwidth(medthick)) (pcbarrow q_id cil_prop90 q_id ciu_prop90 if pr_diff_test_bonf90 == 1 & q_id >= 79, mcolor(none) lcolor(red) lwidth(medthick)), ///
legend(off) ytitle("") ylabel(79(1)113, valuelabel angle(horizontal) labsize(small)) ysize(4) xsize(3) scheme(plotplain) xline(0) saving(diffsc2, replace)
graph combine diffsc1.gph diffsc2.gph, scheme(plotplain) row(1)


** diff of prop in selecting minmax
gen ciu_propm = clean_pr_mm - fraud_pr_mm + 3.18467 * (sqrt(((clean_pr_mm*(1-clean_pr_mm))/clean_n_mm)+((fraud_pr_mm*(1-fraud_pr_mm))/fraud_n_mm)))
gen cil_propm = clean_pr_mm - fraud_pr_mm - 3.18467 * (sqrt(((clean_pr_mm*(1-clean_pr_mm))/clean_n_mm)+((fraud_pr_mm*(1-fraud_pr_mm))/fraud_n_mm)))
gen ciu_propm90 = clean_pr_mm - fraud_pr_mm + 2.97831 * (sqrt(((clean_pr_mm*(1-clean_pr_mm))/clean_n_mm)+((fraud_pr_mm*(1-fraud_pr_mm))/fraud_n_mm)))
gen cil_propm90 = clean_pr_mm - fraud_pr_mm - 2.97831 * (sqrt(((clean_pr_mm*(1-clean_pr_mm))/clean_n_mm)+((fraud_pr_mm*(1-fraud_pr_mm))/fraud_n_mm)))
gen diff_propm = clean_pr_mm - fraud_pr_mm 

gen prmm_diff_test_bonf = 1 if abs(z_mm) > 3.18467
replace prmm_diff_test_bonf = 0 if abs(z_mm) <= 3.18467
gen prmm_diff_test_bonf90 = 1 if abs(z_mm) > 2.97831
replace prmm_diff_test_bonf90 = 0 if abs(z_mm) <= 2.97831

* Fgure A3
twoway (scatter q_id diff_propm if prmm_diff_test_bonf90 == 0 & q_id < 79, msymbol(circle) mcolor(gs8)) (pcbarrow q_id cil_propm q_id ciu_propm if prmm_diff_test_bonf90 == 0 & q_id < 79, mcolor(none) lcolor(gs8) lwidth(medthin)) ///
(scatter q_id diff_propm if prmm_diff_test_bonf90 == 1  & q_id < 79, msymbol(circle) mcolor(red)) (pcbarrow q_id cil_propm q_id ciu_propm if prmm_diff_test_bonf90 == 1 & q_id < 79, mcolor(none) lcolor(red) lwidth(medthin)) ///
(pcbarrow q_id cil_propm90 q_id ciu_propm90 if prmm_diff_test_bonf90 == 0 & q_id < 79, mcolor(none) lcolor(gs8) lwidth(medthick)) (pcbarrow q_id cil_propm90 q_id ciu_propm90 if prmm_diff_test_bonf90 == 1 & q_id < 79, mcolor(none) lcolor(red) lwidth(medthick)), ///
legend(off) ytitle("") ylabel(45(1)78, valuelabel angle(horizontal) labsize(small)) ysize(4) xsize(3) scheme(plotplain) xline(0) saving(diffmm1, replace)
twoway (scatter q_id diff_propm if prmm_diff_test_bonf90 == 0 & q_id >= 79, msymbol(circle) mcolor(gs8)) (pcbarrow q_id cil_propm q_id ciu_propm if prmm_diff_test_bonf90 == 0 & q_id >= 79, mcolor(none) lcolor(gs8) lwidth(medthin)) ///
(scatter q_id diff_propm if prmm_diff_test_bonf90 == 1  & q_id >= 79, msymbol(circle) mcolor(red)) (pcbarrow q_id cil_propm q_id ciu_propm if prmm_diff_test_bonf90 == 1 & q_id >= 79, mcolor(none) lcolor(red) lwidth(medthin)) ///
(pcbarrow q_id cil_propm90 q_id ciu_propm90 if prmm_diff_test_bonf90 == 0 & q_id >= 79, mcolor(none) lcolor(gs8) lwidth(medthick)) (pcbarrow q_id cil_propm90 q_id ciu_propm90 if prmm_diff_test_bonf90 == 1 & q_id >= 79, mcolor(none) lcolor(red) lwidth(medthick)), ///
legend(off) ytitle("") ylabel(79(1)113, valuelabel angle(horizontal) labsize(small)) ysize(4) xsize(3) scheme(plotplain) xline(0) saving(diffmm2, replace)
graph combine diffmm1.gph diffmm2.gph, scheme(plotplain) row(1)

